This session includes examples of accessing GBIF data from R using the rgbif package from rOpenSci


Choose a species name

require(rgbif) # r-package for GBIF data
sp_name <- "Hepatica nobilis"; kingdom <- "Plantae" # liverleaf (blaaveis:no), taxonKey=5371699
#sp_name <- "Hordeum vulgare"; kingdom <- "Plantae" # barley (bygg:no)
#sp_name <- "Pinus sylvestris L"; kingdom <- "Plantae" # scots pine (furu:no), taxonKey=5285637
#sp_name <- "Picea abies (L.) H. Karst"; kingdom <- "Plantae" # Norway spruce (gran:no), taxonKey=5284884
#sp_name <- "Juniperus communis L."; kingdom <- "Plantae" # common juniper (einer:no), taxonKey=2684709
#sp_name <- "Salmo trutta"; kingdom <- "Animalia" # trout (oerret:no)
#sp_name <- "Parus major Linnaeus"; kingdom <- "Animalia" # great tit (kjoettmeis:no), taxonKey=8095051
#sp_name <- "Cycladophora davisiana Ehrenberg"; kingdom <- "Chromista" # radiolaria sp., taxonKey=5955869
key <- name_backbone(name=sp_name, kingdom=kingdom)$speciesKey

You may also use a higher level group (with a taxonKey)

nub <- 'd7dddbf4-2cf0-4f39-9b2a-bb099caae36c'; sp_name <- NULL # GBIF NUB taxon backbone datasetKey
#sp_name <- "Plantae";      rank <- "KINGDOM" # plants, taxonKey=6
#sp_name <- "Tracheophyta"; rank <- "PHYLUM"  # vascular plants, taxonKey=7707728
sp_name <- "Poaceae";      rank <- "FAMILY"  # grasses, taxonKey=3073
#sp_name <- "Aves";         rank <- "CLASS"   # birds, taxonKey=212
key <- name_lookup(query=sp_name, rank=rank, datasetKey=nub, limit=1)$data$key # find taxonKey

Choose a rank among: CLASS, CULTIVAR, CULTIVAR_GROUP, DOMAIN, FAMILY, FORM, GENUS, INFORMAL, INFRAGENERIC_NAME, INFRAORDER, INFRASPECIFIC_NAME, INFRASUBSPECIFIC_NAME, KINGDOM, ORDER, PHYLUM, SECTION, SERIES, SPECIES, STRAIN, SUBCLASS, SUBFAMILY, SUBFORM, SUBGENUS, SUBKINGDOM, SUBORDER, SUBPHYLUM, SUBSECTION, SUBSERIES, SUBSPECIES, SUBTRIBE, SUBVARIETY, SUPERCLASS, SUPERFAMILY, SUPERORDER, SUPERPHYLUM, SUPRAGENERIC_NAME, TRIBE, UNRANKED, VARIETY

Species occurrence data from GBIF

require(rgbif) # r-package for GBIF data
sp <- occ_search(scientificName = "Hepatica nobilis", hasCoordinate = TRUE, limit = 100)
sp <- sp$data
Map of Hepatica nobilis (taxonKey=5371699) using gbifmap()
Map of Hepatica nobilis (taxonKey=5371699) using gbifmap()

Preview of dataframe with search results

head(sp, n=5) ## preview first 5 records
Preview of dataframe for Hepatica nobilis
Preview of dataframe for Hepatica nobilis

Extract coordinates suitable for e.g. Maxent

xy <- sp[c("decimalLongitude","decimalLatitude")] ## Extract only the coordinates
sp_xy <- sp[c("species", "decimalLongitude","decimalLatitude")] ## Input format for Maxent
# structure(sp_xy) ## preview the list of coordinates
head(sp_xy, n=5) ## preview first 5 records
Preview of sp-x-y data extracted for use with Maxent etc.
Preview of sp-x-y data extracted for use with Maxent etc.

Write dataframe to file (useful for Maxent etc.)

#write.table(sp_xy, file="./gbif_demo/sp_xy.txt", sep="\t", row.names=FALSE, qmethod="double") ## for Maxent
readLines("./gbif_demo/sp_xy.txt", n=10)
#readChar("./gbif_demo/sp_xy.txt", file.info("./gbif_demo/sp_xy.txt")$size) ## Alternative preview
Preview the exported data-file, sp_xy.txt
Preview the exported data-file, sp_xy.txt

Read data file back into R

#rm(sp_xy) ## remove vector sp_xy from the R workspace environment, before re-loading
#sp_xy <- read.delim("./gbif_demo/sp_xy.txt", header=TRUE, dec=".", stringsAsFactors=FALSE)
#head(sp_xy, n=5) ## preview first 5 records

GBIF data from Norway

gbifmap for Norway, Hepatica nobilis
gbifmap for Norway, Hepatica nobilis

GBIF data from Trondheim (or another bounding box)

Preview data frame

#head(sp_bb, n=5) ## preview first 5 records
head(sp_bb_m, n=5) ## preview first 5 records
Preview dataframe of results from GBIF using bounding box
Preview dataframe of results from GBIF using bounding box

Mapping with the Leaflet package

## ERROR mapr package deprecated
#library("mapr") # rOpenSci r-package for mapping (occurrence data) # archived 2023
#library("spocc") # rOpenSci r-package with more biodiversity data sources than GBIF
#map_leaflet(sp_bb_m, "decimalLongitude", "decimalLatitude", size=2, color="blue")
#sp_bb_L <- sp_bb_m
#names(sp_bb_L)[names(sp_bb_L) == "decimalLatitude"] <- "lat"
#names(sp_bb_L)[names(sp_bb_L) == "decimalLongitude"] <- "lng"
## map_leaflet() deprectaed --> leaflet()
## -->
library(leaflet) # lat lng
leaflet() %>% addTiles() %>% addMarkers(lng = sp_bb_m$decimalLongitude, lat = sp_bb_m$decimalLatitude, popup = sp_bb_m$name)
##
Map GBIF data with bounding box for Trondheim
Map GBIF data with bounding box for Trondheim

Make a simple map of 4 spring flower species (in Norway) <– ERROR

## liverleaf, wood anemone, dandelion, red clover
spp_names <- c('Hepatica nobilis', 'Anemone nemorosa', 'Taraxacum officinale', 'Trifolium pratense')  
keys <- sapply(spp_names, function(x) name_backbone(name=x, kingdom='plants')$speciesKey, USE.NAMES=FALSE)
#spp <- occ_search(taxonKey=keys, limit=100, return='data', country='NO', hasCoordinate=TRUE)
## ERROR return param is defunct -- Need to extract data ourselves -- TODO
spp <- occ_search(taxonKey=keys, limit=100, country='NO', hasCoordinate=TRUE) ## return list
#spp <- occ_data(taxonKey=keys, limit=100, country='NO', hasCoordinate=TRUE) ## return list
library('plyr') ## r-pkg plyr for splitting, applying and combining data
###
### ERROR TODO looks at how the response data from occ_search() is different from previous
###
spp_df <- ldply(spp$data) ## ldply - split list, apply function, return dataframe (here list to df)
#gbifmap(spp_df, region='norway') ## Alternative simpler map
spp_m <- spp_df[c("name", "decimalLongitude","decimalLatitude", "basisOfRecord", "year", "municipality")]
cols <- c('blue', '#dddddd', 'yellow', 'red')
map_leaflet(spp_m, "decimalLongitude", "decimalLatitude", size=3, color=cols) ## map_leaflet deprecated --> leaflet 
###
### --> TODO
###
Spring flowers (Hepatica nobilis, Anemone nemorosa, Taraxacum officinale, Trifolium pratense)
Spring flowers (Hepatica nobilis, Anemone nemorosa, Taraxacum officinale, Trifolium pratense)


Expand color-ramp when mapping many species

Notice that colors will not be easy to distinguish when number of species is high. Standard color-ramps include 9-12 colors.

## Poaceae has taxonKey=3073 - which gives us multiple species (here 31 unique "names", 33 unique "taxonKey")
bb_t <- c(10.2,63.3,10.6,63.5) ## Trondheim
#spp_t <- occ_search(taxonKey='3073', limit=100, return='data', country='NO', geometry=bb_t, hasCoordinate=TRUE) ## ERROR return param is defunct --> occ_data()
spp_t <- occ_data(taxonKey='3073', limit=100, country='NO', geometry=bb_t, hasCoordinate=TRUE)
spp_t_m <- spp_t$data[c("name", "decimalLongitude","decimalLatitude", "basisOfRecord", "year", "municipality", "taxonKey")]

Plot on map

## ERROR package mapr deprecated --> TODO update/fix color ramp function
## The default color-ramp (Set1) has 9 colors and cause a warning message when more than 9 species are included in the same map.
##library('mapr') # rOpenSci r-package for mapping (occurrence data) --> ERROR mapr deprectaed
##library('spocc') # rOpenSci r-package with more biodiversity data sources than GBIF --> ERROR deprecated
##library('plotly') --> ERROR plotly deprecated
library('RColorBrewer')
n_spp <- length(unique(spp_t_m$name)) # number of unique taxa in dataframe (USE spp$name, NOT spp$taxonKey)
myColors <- colorRampPalette(brewer.pal(11,"Spectral"))(n_spp) # create [n_spp] colors
#myColors <- colorRampPalette(brewer.pal(9,"Set1"))(n_spp) # create color palette with [n_spp] colors
#myColors <- rainbow(length(unique(spp_t_m$name))) # create color palette with [n_spp] colors
#map_leaflet(spp_t_m, "decimalLongitude", "decimalLatitude", size=5, color=myColors) ## ERROR map_leaflet deprecated
leaflet() %>% addTiles() %>% addMarkers(lng = spp_t_m$decimalLongitude, lat = spp_t_m$decimalLatitude)
Map with multiple species, expanded color-ramp
Map with multiple species, expanded color-ramp

Diverse color palettes

library(RColorBrewer)
#display.brewer.all()
display.brewer.pal(n=9, name='Set1')
colorBrewer Set1
colorBrewer Set1

Read more about colors at the https://www.r-bloggers.com/palettes-in-r/


Further reading